An overview of R libraries to query Wikidata (27.1.2019)
https://www.lehir.net/how-to-query-wikidata-in-r/
=> https://www.lehir.net/how-to-query-wikidata-in-r/#summary
> WikidataR
https://github.com/Ironholds/WikidataR
https://cran.r-project.org/web/packages/WikidataR/index.html
https://cran.r-project.org/web/packages/WikidataR/WikidataR.pdf
> WikidataQueryServiceR
https://github.com/bearloga/WikidataQueryServiceR
https://cran.r-project.org/web/packages/WikidataQueryServiceR/index.html
https://cran.r-project.org/web/packages/WikidataQueryServiceR/WikidataQueryServiceR.pdf
> SPARQL
https://cran.r-project.org/web/packages/SPARQL/index.html
https://cran.r-project.org/web/packages/SPARQL/SPARQL.pdf
# Import libraries
library(WikidataQueryServiceR) ## This is an R wrapper for the Wikidata Query Service (WDQS) which provides a way for tools toquery Wikidata via SPARQL.
library(SPARQL) ## Load SPARQL SELECT query result tables as a data frame, or UPDATE the triple store by connecting to an end-point over HTTP.
library(tidyverse) ## # collection of R packages designed for data science
library(sf) ## GIS vector library
library(stringr) ## The stringr package provide a cohesive set of functions designed to make working with strings as easy as possible
library(DT) ## Data objects in R can be rendered as HTML tables using the JavaScript library 'DataTables'
# Number formatting
options(scipen = 1000000)
options(digits = 6)Source: https://www.pexels.com/photo/landscape-nature-love-water-68357/ (last accessed: 01.11.2019)
## WikidataQueryServiceR
start.time <- Sys.time()
museum1_df <- WikidataQueryServiceR::query_wikidata('SELECT DISTINCT ?item ?name ?coord ?lat ?lon
WHERE
{
hint:Query hint:optimizer "None" .
?item wdt:P131* wd:Q11943 .
?item wdt:P31/wdt:P279* wd:Q33506 .
?item wdt:P625 ?coord .
?item p:P625 ?coordinate .
?coordinate psv:P625 ?coordinate_node .
?coordinate_node wikibase:geoLatitude ?lat .
?coordinate_node wikibase:geoLongitude ?lon .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "de" .
?item rdfs:label ?name
}
}
ORDER BY ASC (?name)')
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
## Time difference of 3.89218 secs
DT::datatable(museum1_df)## SPARQL
start.time <- Sys.time()
endpoint <- "https://query.wikidata.org/sparql"
query <- 'SELECT DISTINCT ?item ?name ?coord ?lat ?lon
WHERE
{
hint:Query hint:optimizer "None" .
?item wdt:P131* wd:Q11943 .
?item wdt:P31/wdt:P279* wd:Q33506 .
?item wdt:P625 ?coord .
?item p:P625 ?coordinate .
?coordinate psv:P625 ?coordinate_node .
?coordinate_node wikibase:geoLatitude ?lat .
?coordinate_node wikibase:geoLongitude ?lon .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "de" .
?item rdfs:label ?name
}
}
ORDER BY ASC (?name) '
museum2 <- SPARQL::SPARQL(endpoint,query,curl_args=list(useragent=R.version.string))
museum2_df <- museum2$results
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
## Time difference of 5.4926 secs
DT::datatable(museum2_df)http://yasgui.org/short/fg7fNak6G
## SPARQL
endpoint <- "https://ld.geo.admin.ch/query"
query <- 'PREFIX schema: <http://schema.org/>
PREFIX gn: <http://www.geonames.org/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX st: <https://ld.geo.admin.ch/def/>
select ?Municipality ?Name ?Population ?bfs ?WKT
where{
?Municipality gn:featureCode gn:A.ADM3 .
?Municipality schema:name ?Name .
?Municipality gn:population?Population .
?Municipality st:bfsNumber ?bfs .
?Municipality dct:issued ?Date .
?Municipality gn:parentADM1 ?InCanton .
?InCanton schema:name ?CantonName .
?Municipality geo:hasGeometry ?Geometry .
?Geometry geo:asWKT ?WKT .
FILTER (?Date = "2019-01-01"^^xsd:date)
FILTER (?CantonName = "Zürich")
}'
municipality <- SPARQL::SPARQL(endpoint,query)
municipality_df <- municipality$results
DT::datatable(municipality_df)## museum
museum_sf <- sf::st_as_sf(x = museum2_df, coords = c("lon", "lat"), crs= 4326) %>% st_transform(2056)
museum_sf
## Simple feature collection with 70 features and 3 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 2670720 ymin: 1232780 xmax: 2702640 ymax: 1269530
## epsg (SRID): 2056
## proj4string: +proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## First 10 features:
## item
## 1 <http://www.wikidata.org/entity/Q29574318>
## 2 <http://www.wikidata.org/entity/Q820463>
## 3 <http://www.wikidata.org/entity/Q820482>
## 4 <http://www.wikidata.org/entity/Q686324>
## 5 <http://www.wikidata.org/entity/Q18018874>
## 6 <http://www.wikidata.org/entity/Q22984572>
## 7 <http://www.wikidata.org/entity/Q1408868>
## 8 <http://www.wikidata.org/entity/Q1428489>
## 9 <http://www.wikidata.org/entity/Q1350100>
## 10 <http://www.wikidata.org/entity/Q18643284>
## name
## 1 "Anthropologisches Museum der Universität Zürich"@de
## 2 "Bergwerk Käpfnach"@de
## 3 "Bergwerk Riedhof"@de
## 4 "Botanischer Garten Zürich"@de
## 5 "Bruno Weber Park"@de
## 6 "FIFA World Football Museum"@de
## 7 "Festung Ebersberg"@de
## 8 "Flieger Flab Museum"@de
## 9 "Fotomuseum Winterthur"@de
## 10 "Fotostiftung Schweiz"@de
## coord
## 1 "Point(8.54734 47.39784)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 2 "Point(8.6131 47.2531)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 3 "Point(8.480582 47.287284)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 4 "Point(8.561105555 47.358647222)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 5 "Point(8.52 47.3132)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 6 "Point(8.531749 47.363509)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 7 "Point(8.57978 47.5709)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 8 "Point(8.630978 47.398044)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 9 "Point(8.739 47.496)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 10 "Point(8.7383 47.4958)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## geometry
## 1 POINT (2683697 1250260)
## 2 POINT (2688898 1234236)
## 3 POINT (2678821 1237900)
## 4 POINT (2684798 1245917)
## 5 POINT (2681765 1240821)
## 6 POINT (2682573 1246426)
## 7 POINT (2685866 1269534)
## 8 POINT (2690010 1250375)
## 9 POINT (2697985 1261396)
## 10 POINT (2697931 1261371)
## municipality
municipality_df$WKT[1]
## [1] "\"POLYGON((8.7472484526186 47.63872793924,8.7486391301601 47.641149811125,8.7577368356457 47.643061593929,8.7685767608905 47.650196628528,8.7762917737639 47.648920693533,8.781588141026 47.652016689751,8.7859684265894 47.655186044437,8.7885443005018 47.665374089214,8.7985208138365 47.664987851757,8.8026998168341 47.664911855662,8.8052516080835 47.664728943846,8.8048497424831 47.652239999272,8.8264311573285 47.646749057178,8.8275410732979 47.640340464056,8.8254611361 47.637145182661,8.8202395346886 47.629300988547,8.8134041560736 47.627403298719,8.8117505999138 47.62263761347,8.8146307411499 47.621344212935,8.8115492253172 47.618970089637,8.8140050047976 47.61722298213,8.8049480396453 47.613705058578,8.8041113982792 47.608075113738,8.8030520312273 47.601017581333,8.7887165882117 47.603146215297,8.790274378792 47.608435051421,8.787831326216 47.609650813059,8.7620111618786 47.618079527492,8.7565250796051 47.61680174895,8.7494814441854 47.621387913033,8.7548587025099 47.623629698778,8.7533011512557 47.626813504064,8.7518081938657 47.628314538115,8.7446073461252 47.627676218296,8.7474707108511 47.632636025661,8.7444488263433 47.634747838992,8.7472484526186 47.63872793924))\"^^<http://www.openlinksw.com/schemas/virtrdf#Geometry>"
municipality_df$WKT_corrected <-str_match(municipality_df$WKT, '\"(.*?)\"')[,2]
municipality_df$WKT_corrected[1]
## [1] "POLYGON((8.7472484526186 47.63872793924,8.7486391301601 47.641149811125,8.7577368356457 47.643061593929,8.7685767608905 47.650196628528,8.7762917737639 47.648920693533,8.781588141026 47.652016689751,8.7859684265894 47.655186044437,8.7885443005018 47.665374089214,8.7985208138365 47.664987851757,8.8026998168341 47.664911855662,8.8052516080835 47.664728943846,8.8048497424831 47.652239999272,8.8264311573285 47.646749057178,8.8275410732979 47.640340464056,8.8254611361 47.637145182661,8.8202395346886 47.629300988547,8.8134041560736 47.627403298719,8.8117505999138 47.62263761347,8.8146307411499 47.621344212935,8.8115492253172 47.618970089637,8.8140050047976 47.61722298213,8.8049480396453 47.613705058578,8.8041113982792 47.608075113738,8.8030520312273 47.601017581333,8.7887165882117 47.603146215297,8.790274378792 47.608435051421,8.787831326216 47.609650813059,8.7620111618786 47.618079527492,8.7565250796051 47.61680174895,8.7494814441854 47.621387913033,8.7548587025099 47.623629698778,8.7533011512557 47.626813504064,8.7518081938657 47.628314538115,8.7446073461252 47.627676218296,8.7474707108511 47.632636025661,8.7444488263433 47.634747838992,8.7472484526186 47.63872793924))"
municipality_sfc = st_as_sfc(municipality_df$WKT_corrected, crs=4326)
municipality_sf = st_sf(municipality_df %>% select(-WKT, -WKT_corrected), geometry =municipality_sfc) %>% st_transform(2056)
municipality_sf
## Simple feature collection with 162 features and 4 fields
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: 2669260 ymin: 1223900 xmax: 2716910 ymax: 1283360
## epsg (SRID): 2056
## proj4string: +proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## First 10 features:
## Municipality Name
## 1 <https://ld.geo.admin.ch/boundaries/municipality/292:2019> Stammheim
## 2 <https://ld.geo.admin.ch/boundaries/municipality/261:2019> Zürich
## 3 <https://ld.geo.admin.ch/boundaries/municipality/100:2019> Stadel
## 4 <https://ld.geo.admin.ch/boundaries/municipality/101:2019> Steinmaur
## 5 <https://ld.geo.admin.ch/boundaries/municipality/102:2019> Weiach
## 6 <https://ld.geo.admin.ch/boundaries/municipality/10:2019> Obfelden
## 7 <https://ld.geo.admin.ch/boundaries/municipality/111:2019> Bäretswil
## 8 <https://ld.geo.admin.ch/boundaries/municipality/112:2019> Bubikon
## 9 <https://ld.geo.admin.ch/boundaries/municipality/113:2019> Dürnten
## 10 <https://ld.geo.admin.ch/boundaries/municipality/114:2019> Fischenthal
## Population bfs geometry
## 1 2746 292 POLYGON ((2698340 1277272, ...
## 2 409241 261 POLYGON ((2682965 1242721, ...
## 3 2280 100 POLYGON ((2677040 1263227, ...
## 4 3482 101 POLYGON ((2678167 1260574, ...
## 5 1756 102 POLYGON ((2673733 1268649, ...
## 6 5356 10 POLYGON ((2673859 1233032, ...
## 7 5038 111 POLYGON ((2706232 1242705, ...
## 8 7200 112 POLYGON ((2703756 1233911, ...
## 9 7570 113 POLYGON ((2709905 1236485, ...
## 10 2512 114 POLYGON ((2711223 1241032, ...
# Plot result: R base plot
plot(st_geometry(municipality_sf))
plot(st_geometry(museum_sf), pch = 19, col="blue", cex = 0.5, add = TRUE)
plot(st_geometry(municipality_sf), add = TRUE)
legend(x=2708000,y=1287500,
c("Museum","Muncipality"),
lty=c(NA,1),
pch=c(19,NA),
cex=.8,
col=c("blue","black"),
bty='n'
)# Spatial Join: instead of joining dataframes via an equal ID we join data- frames based on an equal location.
spjoin_sf <- sf::st_join(museum_sf, municipality_sf)
spjoin_sf
## Simple feature collection with 70 features and 7 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 2670720 ymin: 1232780 xmax: 2702640 ymax: 1269530
## epsg (SRID): 2056
## proj4string: +proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## First 10 features:
## item
## 1 <http://www.wikidata.org/entity/Q29574318>
## 2 <http://www.wikidata.org/entity/Q820463>
## 3 <http://www.wikidata.org/entity/Q820482>
## 4 <http://www.wikidata.org/entity/Q686324>
## 5 <http://www.wikidata.org/entity/Q18018874>
## 6 <http://www.wikidata.org/entity/Q22984572>
## 7 <http://www.wikidata.org/entity/Q1408868>
## 8 <http://www.wikidata.org/entity/Q1428489>
## 9 <http://www.wikidata.org/entity/Q1350100>
## 10 <http://www.wikidata.org/entity/Q18643284>
## name
## 1 "Anthropologisches Museum der Universität Zürich"@de
## 2 "Bergwerk Käpfnach"@de
## 3 "Bergwerk Riedhof"@de
## 4 "Botanischer Garten Zürich"@de
## 5 "Bruno Weber Park"@de
## 6 "FIFA World Football Museum"@de
## 7 "Festung Ebersberg"@de
## 8 "Flieger Flab Museum"@de
## 9 "Fotomuseum Winterthur"@de
## 10 "Fotostiftung Schweiz"@de
## coord
## 1 "Point(8.54734 47.39784)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 2 "Point(8.6131 47.2531)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 3 "Point(8.480582 47.287284)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 4 "Point(8.561105555 47.358647222)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 5 "Point(8.52 47.3132)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 6 "Point(8.531749 47.363509)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 7 "Point(8.57978 47.5709)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 8 "Point(8.630978 47.398044)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 9 "Point(8.739 47.496)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 10 "Point(8.7383 47.4958)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## Municipality
## 1 <https://ld.geo.admin.ch/boundaries/municipality/261:2019>
## 2 <https://ld.geo.admin.ch/boundaries/municipality/295:2019>
## 3 <https://ld.geo.admin.ch/boundaries/municipality/1:2019>
## 4 <https://ld.geo.admin.ch/boundaries/municipality/261:2019>
## 5 <https://ld.geo.admin.ch/boundaries/municipality/131:2019>
## 6 <https://ld.geo.admin.ch/boundaries/municipality/261:2019>
## 7 <https://ld.geo.admin.ch/boundaries/municipality/23:2019>
## 8 <https://ld.geo.admin.ch/boundaries/municipality/191:2019>
## 9 <https://ld.geo.admin.ch/boundaries/municipality/230:2019>
## 10 <https://ld.geo.admin.ch/boundaries/municipality/230:2019>
## Name Population bfs geometry
## 1 Zürich 409241 261 POINT (2683697 1250260)
## 2 Horgen 22514 295 POINT (2688898 1234236)
## 3 Aeugst am Albis 1941 1 POINT (2678821 1237900)
## 4 Zürich 409241 261 POINT (2684798 1245917)
## 5 Adliswil 18803 131 POINT (2681765 1240821)
## 6 Zürich 409241 261 POINT (2682573 1246426)
## 7 Berg am Irchel 564 23 POINT (2685866 1269534)
## 8 Dübendorf 28141 191 POINT (2690010 1250375)
## 9 Winterthur 110912 230 POINT (2697985 1261396)
## 10 Winterthur 110912 230 POINT (2697931 1261371)
# Density calculation
# > 1. Count points per polygon
pts_count <- spjoin_sf %>%
dplyr::group_by(bfs) %>%
dplyr::summarise(count=n())
municipality_sf <- municipality_sf %>%
dplyr::left_join(pts_count %>% st_set_geometry(NULL) , by = c("bfs" ))
# > 2. Calculate area of polygon
municipality_sf <- municipality_sf %>%
dplyr::mutate(mun_area_m2 =as.vector(sf::st_area(.)))
# > 3. Calculate density: count/area
municipality_sf$density <- municipality_sf$count / municipality_sf$mun_area_m2 * 1000000
# Plot result: tmap
# > tmap static
tmap::tmap_mode
## function (mode = c("plot", "view"))
## {
## current.mode <- getOption("tmap.mode")
## show.messages <- get(".tmapOptions", envir = .TMAP_CACHE)$show.messages
## if (is.null(match.call(expand.dots = TRUE)[-1])) {
## message("current tmap mode is \"", current.mode, "\"")
## }
## else {
## mode <- match.arg(mode)
## options(tmap.mode = mode)
## if (show.messages) {
## if (mode == "plot") {
## message("tmap mode set to plotting")
## }
## else {
## message("tmap mode set to interactive viewing")
## }
## }
## }
## invisible(current.mode)
## }
## <environment: namespace:tmap>
tmap::tm_shape(municipality_sf) +
tmap::tm_fill("density",
title="Density of museums",
style="quantile",
palette="BuGn",
colorNA = "grey",
textNA = "No Museum"
) +
tmap::tm_borders() +
tmap::tm_layout(frame = FALSE,
legend.position = c("right", "top"),
legend.outside = TRUE)# Export Data as shp
st_write(museum_sf, "./museum.shp", delete_layer = TRUE)
## Writing layer `museum' to data source `./museum.shp' using driver `ESRI Shapefile'
## features: 70
## fields: 3
## geometry type: Point
st_write(municipality_sf, "./municipality.shp", delete_layer = TRUE)
## Writing layer `municipality' to data source `./municipality.shp' using driver `ESRI Shapefile'
## features: 162
## fields: 7
## geometry type: Unknown (any)Thank you @csarasuagar! =D
# municipality
# https://w.wiki/BA8
start.time <- Sys.time()
endpoint <- "https://query.wikidata.org/sparql"
query <- 'SELECT *
{
?ch wdt:P31 wd:Q70208 .
OPTIONAL {?ch wdt:P17 wd:Q39.}
?ch ?prop ?statement .
?statement prov:wasDerivedFrom ?refnode.
?refnode pr:P854 ?ref. #pr:P248 #pr:P854
FILTER (CONTAINS(str(?ref),"statistik.zh.ch"))
}
order by ?ch
'
source_municipality <- SPARQL::SPARQL(endpoint,query,curl_args=list(useragent=R.version.string))
source_municipality_df <-source_municipality$results
DT::datatable(source_municipality_df )# city
# https://w.wiki/BA7
start.time <- Sys.time()
endpoint <- "https://query.wikidata.org/sparql"
query <- 'SELECT *
WHERE
{
?ch wdt:P31 wd:Q54935504 .
OPTIONAL {?ch wdt:P17 wd:Q39.}
?ch ?prop ?statement .
?statement prov:wasDerivedFrom ?refnode.
?refnode pr:P854 ?ref. #pr:P248 #pr:P854
FILTER (CONTAINS(str(?ref),"statistik.zh.ch"))
}
order by ?ch
'
source_city <- SPARQL::SPARQL(endpoint,query,curl_args=list(useragent=R.version.string))
source_city_df <-source_city$results
DT::datatable(source_city_df )